'''
Author: Yizhe Li 1362800827@qq.com
Date: 2024-06-25 10:36:51
LastEditors: Yizhe Li 1362800827@qq.com
LastEditTime: 2024-06-25 12:15:56
FilePath: /ubuntu/HuibanApi-Py/get_search_result.py
Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
'''
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
# from langchain_text_splitters import RecursiveCharacterTextSplitter

def search_with_bing(query):

    query = quote(query)
    url = f'https://cn.bing.com/search?q={query}'
    # url = f'https://www.baidu.com/s?wd={query}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
    }
    # session = requests.Session()
    # session.headers.update(headers)
    # resp = session.get(url)
    resp = requests.get(url, headers=headers)
    print(f"resp: {resp.text}")
    soup = BeautifulSoup(resp.text, 'html.parser')
    print(f"soup: {soup.select_one('#b_results').text[:100]}")
    
    result_elements = soup.select('#b_results > li')
    data = []

    for parent in result_elements:
        if parent.select_one('h2') is None:
            continue
        data.append({
            'title': parent.select_one('h2').text,
            'abstract': parent.select_one('div.b_caption > p').text.replace('\u2002', ' '),
            'link': parent.select_one('div.b_tpcn > a').get('href')
        })
    return data

print(search_with_bing('上海 今天 天气'))

# search_results = search_with_bing('今天上海的天气如何？')
